#!/usr/bin/env python
# SPDX-License-Identifier: MIT
# Copyright (C) 2021 Even Rouault

""" This scripts merges the GRIB2 tables from DEGRIB, which are in the
    frmts/grib/degrib/data directory with the ones from WMO.
"""

import csv
import os
import re

import requests

# URL of the WMO GRIB2 tables repository
wmo_github_repo = "wmo-im/GRIB2"
wmo_github_tag = "v30"
wmo_base_url = f"https://raw.githubusercontent.com/{wmo_github_repo}/{wmo_github_tag}"

script_dir_name = os.path.dirname(os.path.realpath(__file__))

# Input directory with degrib tables
degrib_table_input_dir = os.path.join(script_dir_name, "data")
assert os.path.exists(degrib_table_input_dir)

# Output directory of merged tables (${repository_root}/data)
output_dir = os.path.join(
    os.path.dirname(os.path.dirname(os.path.dirname(script_dir_name))), "data"
)
assert os.path.exists(output_dir)

with open(os.path.join(output_dir, "grib2_table_versions.csv"), "wt") as f:
    f.write("component,version\n")
    f.write(f"wmo,{wmo_github_tag}\n")
    f.write("degrib,2.25\n")

r = requests.get(
    f"https://api.github.com/repos/{wmo_github_repo}/commits/HEAD",
    headers={"Accept": "application/vnd.github.VERSION.sha"},
)
wmo_table_sha1 = r.content.decode("utf-8")
warning_msg = "DO NOT MODIFY THIS FILE. It is generated by frmts/grib/degrib/merge_degrib_and_wmo_tables.py"
warning2_msg = (
    f"from tables at version https://github.com/wmo-im/GRIB2/commit/{wmo_table_sha1}"
)
warning_len = max(len(warning_msg), len(warning2_msg))

# Processes table 4.5 "Fixed surface types and units"


def process_table_4_5():

    # Ingest table from degrib
    degrib_csv_reader = csv.DictReader(
        open(os.path.join(degrib_table_input_dir, "grib2_table_4_5.csv"), newline="")
    )
    assert degrib_csv_reader.fieldnames == ["code", "short_name", "name", "unit"]
    records = [None for i in range(256)]
    for row in degrib_csv_reader:
        code = int(row["code"])
        assert records[code] is None
        row["code"] = code
        records[code] = row

    # Ingest table from WMO
    r = requests.get(wmo_base_url + "/GRIB2_CodeFlag_4_5_CodeTable_en.csv")
    lines = r.content.decode("utf-8").split("\n")
    wmo_csv_reader = csv.DictReader(lines)
    assert wmo_csv_reader.fieldnames == [
        "Title_en",
        "SubTitle_en",
        "CodeFlag",
        "Value",
        "MeaningParameterDescription_en",
        "Note_en",
        "noteIDs",
        "UnitComments_en",
        "Status",
    ], wmo_csv_reader.fieldnames
    wmo_records = [None for i in range(256)]
    for row in wmo_csv_reader:
        code = row["CodeFlag"]
        if "-" in code:
            # Deal with ranges (reserved codes)
            min_val, max_val = code.split("-")
            min_val = int(min_val)
            max_val = int(max_val)
            assert min_val < max_val
        else:
            min_val = int(code)
            max_val = int(code)
        for code in range(min_val, max_val + 1):
            assert wmo_records[code] is None
            wmo_records[code] = {
                "code": code,
                "name": row["MeaningParameterDescription_en"],
                "unit": row["UnitComments_en"],
            }

    # Merge records from WMO into degrib
    # degrib ones have the precedence when they exist since they contain
    # short_name.
    for code in range(len(records)):
        if wmo_records[code] is not None:
            if records[code] is None or (
                "Reserved" in records[code]["name"]
                and "Reserved" not in wmo_records[code]["name"]
            ):
                unit = wmo_records[code]["unit"]
                unit = "-" if unit == "" else unit
                records[code] = {
                    "code": code,
                    "short_name": "",
                    "name": wmo_records[code]["name"],
                    "unit": unit,
                }

    # Write the final file
    with open(
        os.path.join(output_dir, "grib2_table_4_5.csv"), "w", newline=""
    ) as csvfile:
        writer = csv.DictWriter(
            csvfile,
            fieldnames=degrib_csv_reader.fieldnames,
            quoting=csv.QUOTE_NONNUMERIC,
        )
        writer.writeheader()
        writer.writerow(
            {"code": -4, "short_name": "#" * warning_len, "name": "#", "unit": "#"}
        )
        writer.writerow(
            {"code": -3, "short_name": warning_msg, "name": "#", "unit": "#"}
        )
        writer.writerow(
            {"code": -2, "short_name": warning2_msg, "name": "#", "unit": "#"}
        )
        writer.writerow(
            {"code": -1, "short_name": "#" * warning_len, "name": "#", "unit": "#"}
        )
        for row in records:
            writer.writerow(row)


process_table_4_5()


# Process table referenced by 4.2 with "(Code table 4.X)" in the UnitComments_en
# column
def get_auxiliary_table(num):
    url = wmo_base_url + f"/GRIB2_CodeFlag_4_{num}_CodeTable_en.csv"
    print(f"Fetching {url}...")
    r = requests.get(url)
    lines = r.content.decode("utf-8").split("\n")
    wmo_csv_reader = csv.DictReader(lines)
    assert wmo_csv_reader.fieldnames == [
        "Title_en",
        "SubTitle_en",
        "CodeFlag",
        "Value",
        "MeaningParameterDescription_en",
        "Note_en",
        "noteIDs",
        "UnitComments_en",
        "Status",
    ], wmo_csv_reader.fieldnames
    ret = ""
    for row in wmo_csv_reader:
        if ret:
            ret += "; "
        ret += row["CodeFlag"]
        ret += "="
        ret += row["MeaningParameterDescription_en"]
    return ret


# Processes table 4.2 "Parameter Number by Product discipline and Parameter category"


def process_table_4_2():

    # Fetch the WMO table, which contains the full table for all product
    # disciplines and parameter category
    url = wmo_base_url + "/GRIB2_CodeFlag_4_2_CodeTable_en.csv"
    print(f"Fetching {url}...")
    r = requests.get(url)
    lines = r.content.decode("utf-8").split("\n")
    wmo_csv_reader = csv.DictReader(lines)
    assert wmo_csv_reader.fieldnames == [
        "Title_en",
        "SubTitle_en",
        "CodeFlag",
        "Value",
        "MeaningParameterDescription_en",
        "Note_en",
        "noteIDs",
        "UnitComments_en",
        "Status",
    ], wmo_csv_reader.fieldnames

    # dict_tables has as key a subtable (by product discipline and category)
    # and as value a table of 256 entries with the subcategory as index
    dict_tables = {}

    auxiliary_tables = {}

    for row in wmo_csv_reader:

        # Split values
        # "Product discipline 0 - Meteorological products, parameter category 1: moisture""
        # as product_type = 0 and category = 1
        m = re.match(r".+?([0-9]+).+?([0-9]+).+", row["SubTitle_en"])
        product_type = int(m.group(1))
        category = int(m.group(2))

        # Instantiate final table
        table_name = f"grib2_table_4_2_{product_type}_{category}.csv"
        if table_name not in dict_tables:
            dict_tables[table_name] = [None for i in range(256)]
        table = dict_tables[table_name]

        code = row["CodeFlag"]

        if "-" in code:
            min_val, max_val = code.split("-")
            min_val = int(min_val)
            max_val = int(max_val)
            assert min_val < max_val
        else:
            min_val = int(code)
            max_val = int(code)

        name = row["MeaningParameterDescription_en"]
        unit = row["UnitComments_en"]
        m = re.match(r"\(Code table 4\.([0-9]+)\)", unit)
        if m is not None:
            auxiliary_table_num = m.group(1)
            if auxiliary_table_num not in auxiliary_tables:
                auxiliary_tables[auxiliary_table_num] = get_auxiliary_table(
                    auxiliary_table_num
                )
            unit = auxiliary_tables[auxiliary_table_num]

        for code in range(min_val, max_val + 1):
            assert table[code] is None, (table_name, code, table[code], row)
            table[code] = {
                "subcat": code,
                "short_name": "",
                "name": name,
                "unit": unit,
                "unit_conv": "UC_NONE",
                "override_unit": True if m is not None else False,
            }

    fieldnames = ["subcat", "short_name", "name", "unit", "unit_conv"]

    # Iterate over all sub tables.
    for filename, wmo_table in dict_tables.items():
        degrib_filename = os.path.join(degrib_table_input_dir, filename)

        # Ingest corresponding degrib subtable, if it exists
        if os.path.exists(degrib_filename):
            degrib_csv_reader = csv.DictReader(open(degrib_filename, newline=""))
            assert degrib_csv_reader.fieldnames == fieldnames
            records = [None for i in range(256)]
            for row in degrib_csv_reader:
                code = int(row["subcat"])
                assert records[code] is None
                row["subcat"] = code
                records[code] = row

            # Merge WMO and degrib subtable
            # degrib entries have precedence, unless there are reserved ranges
            # and WMO entry is not reserved
            for code in range(len(records)):
                if wmo_table[code] is not None:
                    if records[code] is None:
                        records[code] = wmo_table[code]
                    elif (
                        "Reserved" in records[code]["name"]
                        and "Reserved" not in wmo_table[code]["name"]
                    ):
                        # print('Patching ', records[code], ' with ', wmo_table[code])
                        records[code] = wmo_table[code]
                    if wmo_table[code]["override_unit"]:
                        records[code]["unit"] = wmo_table[code]["unit"]

        else:
            records = wmo_table

        # Write final subtable
        with open(os.path.join(output_dir, filename), "w", newline="") as csvfile:
            writer = csv.DictWriter(
                csvfile, fieldnames=fieldnames, quoting=csv.QUOTE_NONNUMERIC
            )
            writer.writeheader()
            writer.writerow(
                {
                    "subcat": -4,
                    "short_name": "#" * warning_len,
                    "name": "#",
                    "unit": "#",
                    "unit_conv": "#",
                }
            )
            writer.writerow(
                {
                    "subcat": -3,
                    "short_name": warning_msg,
                    "name": "#",
                    "unit": "#",
                    "unit_conv": "#",
                }
            )
            writer.writerow(
                {
                    "subcat": -2,
                    "short_name": warning2_msg,
                    "name": "#",
                    "unit": "#",
                    "unit_conv": "#",
                }
            )
            writer.writerow(
                {
                    "subcat": -1,
                    "short_name": "#" * warning_len,
                    "name": "#",
                    "unit": "#",
                    "unit_conv": "#",
                }
            )
            for row in records:
                if row:
                    writer.writerow({x: row[x] for x in fieldnames})


process_table_4_2()
